Setup


knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
## TODO: Change paths
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th179"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/20221108_TsH_LSS_cisiabmix2_179/steinbock/masks_deepcell"

Read Inputs


First, we read in the results from the CISI analysis using a pre-defined A and the best parameters from the parameter sweep. For this we have data from simulated composite data from training, from the actual experiment and the decomposed data.

The channels from the experiment were decomposed using the dictionary U with the best predicted performance (from the run using ‘min strategy’ for A) for a 8 to 6 compression. This dictionary came from the run with the parameters: k=2, d=10 and m=6.

## Read results
# Read in all results into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.df <- lapply(results.files, read_results, type="res", voi="comparison") %>% 
  bind_rows() %>%
  dplyr::select(-c("dataset", "training", "datasize"))


## Read X_test, X_simulated and X_decomposed 
# Specify files to be read in
X_results.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. if it is the ground truth or simulated X and if it comes from training simulations,
# experiment simulation or the acutal decomposed experiment)
X_results.list <- lapply(X_results.files, read_results, type="x", voi="comparison")
X_results.list <- lapply(X_results.list, function(sce.temp){
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training)) 
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read anndata object containing composite measurements
X_simulation.files <- list.files(analysis.path, "composite_measurements.h5ad", 
                             full.names=TRUE, recursive=TRUE)
X_simulation.list <- lapply(X_simulation.files, read_results, type="x", use_voi=FALSE)
# Remove non-composite channels, add if data comes form simulated or real composite 
# measurements and add arcsinh transformed counts
X_simulation.list <- lapply(1:length(X_simulation.list), function(i){
  sce.temp <- X_simulation.list[[i]]
  sce.temp <- sce.temp[grepl("CC\\d_", rownames(sce.temp)), ]
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training))
  metadata(sce.temp)$ground_truth <- ifelse(grepl("simulated", X_simulation.files[i]),
                                            "simulated", "ground_truth")
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read U
u.files <- list.files(analysis.path, "gene_modules",
                      full.names=TRUE, recursive=TRUE)
u.files <- u.files[!grepl("min", u.files)]
u <- read_single_U(u.files)


## Read A
a.files <- list.files(analysis.path, "version_*",
                      full.names=TRUE, recursive=TRUE)
a <- read_single_A(a.files) 


# Read in masks 
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Results


Barplot of Results

The barplot shows the different measurements used to evaluate decomposition of CISI with the specified U, A and parameters.

There are three different results: training (decomposed simulated data from training compared to ground truth training data), experiment-simulation (decomposed simulated data from experiment data compared to ground truth experiment data) and experiment (decomposed real composite measurements from experiment compared to ground truth experiment data).

# For each different measurement of training results, plot barplot 
# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("version")) %>%
  pivot_longer(!c("simulation", "comparison"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "comparison")
print(results.barplot)

Simulated vs Real Composite Measurements


Images

Here we show the composite measurements vs the simulated composite measurements for all images.

# Find the names of all composite channels
pois <- rownames(X_simulation.list[[1]])
names(pois) <- rownames(X_simulation.list[[1]])


# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_simulation.list, masks, pois[[n]], layer="exprs", display="all")
  
  # Plot simulated vs true composite measurements  
  img <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_simulation.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=15, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img)
  cat("\n\n")
}

CC0_panCK_Ki-67

CC1_CD4

CC2_SMA_CD20

CC3_CD3

CC4_CD11c

CC5_Ki-67_CD8a

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_simulation.list)

Decomposed vs Ground truth Measurements


Images

Here we show the decomposed measurements vs the ground truth measurements for all images.

# Subset list to decomposed and ground truth expressions
X_decomposition.list <- keep(X_results.list, function(x){
  metadata(x)$comparison=="experiment"})
# Find the names of all protein channels
pois <- rownames(X_decomposition.list[[1]])
names(pois) <- rownames(X_decomposition.list[[1]])


# Call plot_cells to get individual plots for all rois for decomposed and real
# measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_decomposition.list, masks, pois[[n]], layer="exprs", display="all")

  # Plot decomposed vs true results 
  img <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_decomposition.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=15, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img)
  cat("\n\n")
}

SMA

CD3

CD20

CD8a

CD11c

CD4

panCK

Ki-67

Correlations per Protein (arcsinh)

Correlation between ground truth and decomposed results per protein for
arcsinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and decomposed data for each protein
X.cor <- lapply(X_decomposition.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable)
}) %>% bind_rows() %>%
  group_by(protein, cell) %>%
  summarise_all(na.omit) %>%
  group_by(protein) %>%
  summarise(correlation=cor(ground_truth, decomposed))

# Plot correlations per protein
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)

Training vs Experiment


Scatterplot of Correlations per Protein (arcsinh)

Scatterplot of correlations between ground truth and simulated or decomposed results per protein for arcsinh transformed counts .

aoi <- "exprs"

# Calculate correlations between ground truth and simulated data for each protein
X.cor <- X.cor %>% 
  dplyr::rename(correlation_decomposition=correlation) %>%
  merge(lapply(keep(X_results.list, function(x){
    metadata(x)$comparison=="training"}), 
    function(sce){
      counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
        mutate(protein=rownames(.)) %>%
        melt() %>%
        dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                      cell=variable) 
      
    }) %>% bind_rows() %>%
      group_by(protein, cell) %>%
      summarise_all(na.omit) %>%
      group_by(protein) %>%
      summarise(correlation_training=cor(ground_truth, simulated)))


# Plot correlations for each protein
correlations.plot <- ggscatter(X.cor,
                               x="correlation_training", 
                               y="correlation_decomposition",
                               label="protein",
                               label.rectangle=TRUE,
                               color=pal_npg("nrc")("1"),
                               add.params=list(color=pal_npg("nrc")("4")[4],
                                               size=2)) +
  theme_cowplot(title.fontsize) +
  geom_abline(slope=1, linetype="dashed") +
  stat_cor(size=2) +
  ylim(0, 1) +
  xlim(0, 1)

print(correlations.plot)

Matrix designs


U

# Plot U 
u.plot <- plot_single_U(u, paste0("U\n(dictionary size: ", length(unique(u$module)), ")"))
print(u.plot)

A

# Plot A 
a.plot <- plot_single_A(a, "A")
print(a.plot)